sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.35 R6_2.5.1 fastmap_1.1.1 xfun_0.43
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.8.1 rmarkdown_2.26
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.9 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.16.0 tools_4.3.2 evaluate_0.23
## [17] bslib_0.7.0 yaml_2.3.8 rlang_1.1.3 jsonlite_1.8.8
First, load the necessary packages.
# load libraries - notes show the install command needed to install (pre installed)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
library(grDevices)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
##
## rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## The following object is masked from 'package:circlize':
##
## degree
## The following object is masked from 'package:plyr':
##
## join
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## The following objects are masked from 'package:zoo':
##
## yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
biomin <- read.csv("../../output/Biomin_blast_Pocillopora_acuta_best_hit.csv") %>% rename("Pocillopora_acuta_best_hit" = "gene_id" )
geneInfo <- read.csv("../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv")
biomin_mod <- merge(biomin, geneInfo, by=c("gene_id"), all=F)
head(biomin_mod)
## gene_id accessionnumber.geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 XP_022804785.1
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 P33_g8985
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 JR972076.1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Gene:g13552
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 aug_v2a.06327.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 PFX18785.1
## definition
## 1 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 2 Flagellar associated protein
## 3 Acidic skeletal organic matrix protein (Acidic SOMP)
## 4 Acidic SOMP (Full-Length p27)
## 5 SAARP3
## 6 Mucin-4 [Stylophora pistillata]
## Ref X seqnames start
## 1 Peled et al., 2020 224 Pocillopora_acuta_HIv2___Sc0000021 2329764
## 2 Drake et al., 2013 604 Pocillopora_acuta_HIv2___Sc0000013 2026351
## 3 Ramos-Silva et al., 2013 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 4 Mummadisetti et al., 2021 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 5 Takeuchi et al., 2016 1049 Pocillopora_acuta_HIv2___Sc0000004 7860395
## 6 Peled et al., 2020 1240 Pocillopora_acuta_HIv2___Sc0000005 2364120
## end width strand source type score.x phase
## 1 2349234 19471 + AUGUSTUS transcript NA NA
## 2 2032291 5941 - AUGUSTUS transcript NA NA
## 3 7864189 3795 + AUGUSTUS transcript NA NA
## 4 7864189 3795 + AUGUSTUS transcript NA NA
## 5 7864189 3795 + AUGUSTUS transcript NA NA
## 6 2382635 18516 + AUGUSTUS transcript NA NA
## ID
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## transcript_id seed_ortholog evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 45351.EDO38228 1.51e-306
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 6087.XP_002163848.1 4.11e-211
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 <NA> NA
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 <NA> NA
## score.y
## 1 851
## 2 607
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## eggNOG_OGs
## 1 COG0695@1|root,COG1249@1|root,KOG1752@2759|Eukaryota,KOG4716@2759|Eukaryota,38BHT@33154|Opisthokonta,3BBN5@33208|Metazoa
## 2 28N88@1|root,2QUTJ@2759|Eukaryota,39ZRG@33154|Opisthokonta,3BMP5@33208|Metazoa
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## max_annot_lvl COG_category Description
## 1 33208|Metazoa C thioredoxin-disulfide reductase activity
## 2 33208|Metazoa - -
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## Preferred_name
## 1 TXNRD1
## 2 -
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## GOs
## 1 GO:0000003;GO:0000302;GO:0000305;GO:0001650;GO:0001704;GO:0001707;GO:0001887;GO:0001890;GO:0003006;GO:0003674;GO:0003824;GO:0004791;GO:0005488;GO:0005515;GO:0005575;GO:0005622;GO:0005623;GO:0005634;GO:0005654;GO:0005730;GO:0005737;GO:0005739;GO:0005783;GO:0005829;GO:0006082;GO:0006139;GO:0006518;GO:0006520;GO:0006575;GO:0006725;GO:0006732;GO:0006733;GO:0006739;GO:0006749;GO:0006753;GO:0006790;GO:0006793;GO:0006796;GO:0006807;GO:0006950;GO:0006979;GO:0007154;GO:0007165;GO:0007275;GO:0007369;GO:0007498;GO:0008150;GO:0008152;GO:0008283;GO:0009056;GO:0009069;GO:0009117;GO:0009611;GO:0009628;GO:0009636;GO:0009653;GO:0009790;GO:0009888;GO:0009987;GO:0010035;GO:0010038;GO:0010269;GO:0010941;GO:0010942;GO:0012505;GO:0015036;GO:0015949;GO:0016043;GO:0016174;GO:0016209;GO:0016259;GO:0016491;GO:0016651;GO:0016667;GO:0016668;GO:0016999;GO:0017001;GO:0017144;GO:0018996;GO:0019216;GO:0019222;GO:0019362;GO:0019637;GO:0019725;GO:0019752;GO:0022404;GO:0022414;GO:0022607;GO:0023052;GO:0031974;GO:0031981;GO:0032501;GO:0032502;GO:0033554;GO:0033797;GO:0034599;GO:0034641;GO:0036295;GO:0036296;GO:0036477;GO:0042221;GO:0042303;GO:0042395;GO:0042493;GO:0042537;GO:0042592;GO:0042737;GO:0042743;GO:0042744;GO:0042802;GO:0042803;GO:0043025;GO:0043167;GO:0043169;GO:0043226;GO:0043227;GO:0043228;GO:0043229;GO:0043231;GO:0043232;GO:0043233;GO:0043436;GO:0043603;GO:0043933;GO:0044085;GO:0044237;GO:0044238;GO:0044248;GO:0044281;GO:0044297;GO:0044422;GO:0044424;GO:0044428;GO:0044444;GO:0044446;GO:0044452;GO:0044464;GO:0045340;GO:0045454;GO:0046483;GO:0046496;GO:0046688;GO:0046872;GO:0046914;GO:0046983;GO:0048332;GO:0048513;GO:0048518;GO:0048522;GO:0048598;GO:0048608;GO:0048646;GO:0048678;GO:0048729;GO:0048731;GO:0048856;GO:0050664;GO:0050789;GO:0050794;GO:0050896;GO:0051186;GO:0051187;GO:0051259;GO:0051262;GO:0051716;GO:0055086;GO:0055093;GO:0055114;GO:0061458;GO:0065003;GO:0065007;GO:0065008;GO:0070013;GO:0070276;GO:0070482;GO:0070887;GO:0070995;GO:0071241;GO:0071248;GO:0071280;GO:0071453;GO:0071455;GO:0071704;GO:0071840;GO:0072524;GO:0072593;GO:0080090;GO:0097237;GO:0097458;GO:0098623;GO:0098625;GO:0098626;GO:0098754;GO:0098869;GO:1901360;GO:1901564;GO:1901605;GO:1901700;GO:1990748
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## EC KEGG_ko KEGG_Pathway
## 1 1.8.1.9 ko:K22182 ko00450,ko05200,ko05225,map00450,map05200,map05225
## 2 - - -
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC
## 1 - R03596,R09372 RC02518,RC02873 ko00000,ko00001,ko01000 -
## 2 - - - - -
## 3 <NA> <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA>
## CAZy BiGG_Reaction PFAMs KEGG_new
## 1 - - Glutaredoxin,Pyr_redox_2,Pyr_redox_dim K22182
## 2 - - - <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA>
## 6 <NA> <NA> <NA> K22017
## substanceBXH geneSymbol
## 1 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 Pocillopora_acuta_HIv2___Sc0000021
## 2 Pocillopora_acuta_HIv2___RNAseq.g11609.t1 Pocillopora_acuta_HIv2___Sc0000013
## 3 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 4 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 5 Pocillopora_acuta_HIv2___RNAseq.g13172.t1 Pocillopora_acuta_HIv2___Sc0000004
## 6 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 Pocillopora_acuta_HIv2___Sc0000005
## moduleColor
## 1 brown
## 2 turquoise
## 3 red
## 4 red
## 5 red
## 6 pink
## GO.terms
## 1 GO:0000003,GO:0000302,GO:0000305,GO:0001650,GO:0001704,GO:0001707,GO:0001887,GO:0001890,GO:0003006,GO:0003674,GO:0003824,GO:0004791,GO:0005488,GO:0005515,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0005737,GO:0005739,GO:0005783,GO:0005829,GO:0006082,GO:0006139,GO:0006518,GO:0006520,GO:0006575,GO:0006725,GO:0006732,GO:0006733,GO:0006739,GO:0006749,GO:0006753,GO:0006790,GO:0006793,GO:0006796,GO:0006807,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007275,GO:0007369,GO:0007498,GO:0008150,GO:0008152,GO:0008283,GO:0009056,GO:0009069,GO:0009117,GO:0009611,GO:0009628,GO:0009636,GO:0009653,GO:0009790,GO:0009888,GO:0009987,GO:0010035,GO:0010038,GO:0010269,GO:0010941,GO:0010942,GO:0012505,GO:0015036,GO:0015949,GO:0016043,GO:0016174,GO:0016209,GO:0016259,GO:0016491,GO:0016651,GO:0016667,GO:0016668,GO:0016999,GO:0017001,GO:0017144,GO:0018996,GO:0019216,GO:0019222,GO:0019362,GO:0019637,GO:0019725,GO:0019752,GO:0022404,GO:0022414,GO:0022607,GO:0023052,GO:0031974,GO:0031981,GO:0032501,GO:0032502,GO:0033554,GO:0033797,GO:0034599,GO:0034641,GO:0036295,GO:0036296,GO:0036477,GO:0042221,GO:0042303,GO:0042395,GO:0042493,GO:0042537,GO:0042592,GO:0042737,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0043025,GO:0043167,GO:0043169,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043436,GO:0043603,GO:0043933,GO:0044085,GO:0044237,GO:0044238,GO:0044248,GO:0044281,GO:0044297,GO:0044422,GO:0044424,GO:0044428,GO:0044444,GO:0044446,GO:0044452,GO:0044464,GO:0045340,GO:0045454,GO:0046483,GO:0046496,GO:0046688,GO:0046872,GO:0046914,GO:0046983,GO:0048332,GO:0048513,GO:0048518,GO:0048522,GO:0048598,GO:0048608,GO:0048646,GO:0048678,GO:0048729,GO:0048731,GO:0048856,GO:0050664,GO:0050789,GO:0050794,GO:0050896,GO:0051186,GO:0051187,GO:0051259,GO:0051262,GO:0051716,GO:0055086,GO:0055093,GO:0055114,GO:0061458,GO:0065003,GO:0065007,GO:0065008,GO:0070013,GO:0070276,GO:0070482,GO:0070887,GO:0070995,GO:0071241,GO:0071248,GO:0071280,GO:0071453,GO:0071455,GO:0071704,GO:0071840,GO:0072524,GO:0072593,GO:0080090,GO:0097237,GO:0097458,GO:0098623,GO:0098625,GO:0098626,GO:0098754,GO:0098869,GO:1901360,GO:1901564,GO:1901605,GO:1901700,GO:1990748
## 2 -
## 3 <NA>
## 4 <NA>
## 5 <NA>
## 6 <NA>
## GO.description GS.Flat GS.Slope p.GS.Flat
## 1 thioredoxin-disulfide reductase activity 0.57178848 -0.57178848 3.311055e-05
## 2 - -0.29586493 0.29586493 4.589336e-02
## 3 <NA> 0.35628512 -0.35628512 1.508700e-02
## 4 <NA> 0.35628512 -0.35628512 1.508700e-02
## 5 <NA> 0.35628512 -0.35628512 1.508700e-02
## 6 <NA> -0.05455251 0.05455251 7.187880e-01
## p.GS.Slope A.brown p.A.brown A.magenta p.A.magenta A.red
## 1 3.311055e-05 0.7005073 5.973619e-08 -0.3738439 1.048844e-02 0.2901298
## 2 4.589336e-02 -0.4291375 2.921081e-03 0.3115539 3.505853e-02 -0.3452015
## 3 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 4 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 5 1.508700e-02 0.4914202 5.241968e-04 -0.6288605 2.864308e-06 0.6673892
## 6 7.187880e-01 0.0972208 5.203783e-01 -0.3252127 2.743096e-02 0.3709019
## p.A.red A.turquoise p.A.turquoise A.purple p.A.purple A.green
## 1 5.047695e-02 -0.43323233 2.634293e-03 0.6984202 6.792759e-08 0.4574538
## 2 1.879503e-02 0.58815287 1.720729e-05 -0.1784560 2.353887e-01 -0.1306835
## 3 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 4 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 5 4.071016e-07 -0.13892006 3.571825e-01 0.1198762 4.274677e-01 0.2378899
## 6 1.116224e-02 0.08164806 5.895928e-01 -0.1391597 3.563456e-01 0.1614616
## p.A.green A.lightcyan p.A.lightcyan A.pink p.A.pink A.blue
## 1 0.001391986 -0.3508191 1.682948e-02 0.1707384 2.565893e-01 0.12358439
## 2 0.386672688 0.1196505 4.283449e-01 -0.1522331 3.125037e-01 -0.58598406
## 3 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 4 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 5 0.111386103 -0.6473842 1.159989e-06 0.7188738 1.835918e-08 0.07448551
## 6 0.283719167 -0.5276145 1.646022e-04 0.6417477 1.537006e-06 -0.02286640
## p.A.blue A.salmon p.A.salmon A.midnightblue p.A.midnightblue
## 1 4.132051e-01 0.1178467 0.435389343 0.2439890 0.10224333
## 2 1.880492e-05 0.1907995 0.204028320 0.2258109 0.13131383
## 3 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 4 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 5 6.227429e-01 0.4254256 0.003204458 0.2691022 0.07053914
## 6 8.801027e-01 0.2940377 0.047315397 0.2906592 0.05003895
## A.black p.A.black A.cyan p.A.cyan A.yellow p.A.yellow
## 1 -0.28430645 0.05550307 0.04904562 0.7461773 0.05522073 0.7154873547
## 2 -0.18825739 0.21023361 0.07386502 0.6256510 -0.14392338 0.3399558326
## 3 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 4 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 5 0.09618758 0.52484209 0.16699226 0.2673276 -0.38010677 0.0091694889
## 6 0.02103556 0.88964060 -0.13338389 0.3768501 -0.46998526 0.0009821983
## A.tan p.A.tan Length
## 1 0.2648346 0.075293267 19471
## 2 0.2613466 0.079363532 5941
## 3 -0.2055446 0.170565420 3795
## 4 -0.2055446 0.170565420 3795
## 5 -0.2055446 0.170565420 3795
## 6 -0.3805358 0.009084622 18516
length(unique(biomin_mod$gene_id))
## [1] 65
plyr::count(biomin_mod, "moduleColor")
## moduleColor freq
## 1 black 3
## 2 blue 36
## 3 brown 17
## 4 cyan 2
## 5 green 3
## 6 magenta 1
## 7 pink 7
## 8 red 18
## 9 salmon 6
## 10 tan 3
## 11 turquoise 21
## 12 yellow 10
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector_biomin <- biomin_mod %>%
pull(gene_id) %>% unique()
##Get a list of GO Terms for the biomineralization genes
GO.terms_biomin <- biomin_mod %>%
dplyr::select(gene_id, GOs) %>% unique() %>% rename(GOs = "GO.terms")
dim(GO.terms_biomin)
## [1] 65 2
dim(GO.terms_biomin %>% filter(!is.na(GO.terms)))
## [1] 22 2
## only 22/65 of these genes have GO annotations!!
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms)
## [1] 9012 2
dim(GO.terms %>% filter(!is.na(GO.terms)))
## [1] 4564 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene_id", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector = as.integer(ALL.vector %in% ID.vector_biomin)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector_biomin, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
head(GO)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 4600 GO:0015701 1.109811e-08 1.0000000 5
## 3485 GO:0008510 8.000832e-07 1.0000000 3
## 4475 GO:0015106 2.473894e-06 1.0000000 3
## 4532 GO:0015301 1.300672e-05 0.9999999 3
## 1416 GO:0004064 9.657516e-05 1.0000000 2
## 14975 GO:0099516 1.405808e-04 0.9999973 3
## numInCat term ontology
## 4600 11 bicarbonate transport BP
## 3485 3 sodium:bicarbonate symporter activity MF
## 4475 4 bicarbonate transmembrane transporter activity MF
## 4532 7 <NA> <NA>
## 1416 2 arylesterase activity MF
## 14975 12 <NA> <NA>
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
go_results <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv")
head(go_results)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0015701 1.109811e-08 1.0000000 5
## 2 2 GO:0044331 4.344468e-04 0.9999983 2
## 3 3 GO:0051453 8.010476e-04 0.9999684 3
## 4 4 GO:0030641 8.796110e-04 0.9999641 3
## 5 5 GO:0006885 1.025213e-03 0.9999556 3
## 6 6 GO:1902476 1.143461e-03 0.9999494 3
## numInCat term ontology
## 1 11 bicarbonate transport BP
## 2 3 cell-cell adhesion mediated by cadherin BP
## 3 29 regulation of intracellular pH BP
## 4 30 regulation of cellular pH BP
## 5 32 regulation of pH BP
## 6 26 chloride transmembrane transport BP
go_results_BP <- go_results %>%
filter(ontology=="BP") %>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 156 8
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0015701 1.109811e-08 1.0000000 5
## 2 2 GO:0044331 4.344468e-04 0.9999983 2
## 3 3 GO:0051453 8.010476e-04 0.9999684 3
## 4 4 GO:0030641 8.796110e-04 0.9999641 3
## 5 5 GO:0006885 1.025213e-03 0.9999556 3
## 6 6 GO:1902476 1.143461e-03 0.9999494 3
## numInCat term ontology
## 1 11 bicarbonate transport BP
## 2 3 cell-cell adhesion mediated by cadherin BP
## 3 29 regulation of intracellular pH BP
## 4 30 regulation of cellular pH BP
## 5 32 regulation of pH BP
## 6 26 chloride transmembrane transport BP
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/biomin_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50))
dev.off()
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
##
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 51 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>% filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 15
The reduced list of terms is 51 terms that falls under 15 parent terms.
#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
Can further reduce like so:
#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane", "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")
Search for these keywords in the parent terms
go_results_BP_reduced <- go_results_BP_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
This is only 19 terms
#write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/Biomineralization_goterms.csv")
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered_keywords.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_biomin <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_biomin
# GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
# GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
# GO_05_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv") %>% dplyr::select(-"X")
GO_05_biomin <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 2772 8
go_results_BP_down <- GO_05_down %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 721 8
go_results_BP_biomin <- GO_05_biomin %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_biomin)
## [1] 156 7
all_enriched_terms_up_down_biomin <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm,go_results_BP_biomin$GOterm)
length(all_enriched_terms_up_down_biomin)
## [1] 3649
length(unique(all_enriched_terms_up_down_biomin))
## [1] 3342
By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules or the biomineralization genes.
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down_biomin),
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
reducedTerms <- reduceSimMatrix(simMatrix,
scores = "size",
threshold=0.7,
orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 2036 10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_up_reduced$GOterm))
## [1] 1707
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 147
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_down_reduced$GOterm))
## [1] 433
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 85
#keep only the goterms from the reduced list
go_results_BP_biomin_reduced <- go_results_BP_biomin %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_biomin_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_biomin_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_biomin_reduced$GOterm))
## [1] 51
length(unique(go_results_BP_biomin_reduced$ParentTerm))
## [1] 19
#add vector for terms of interest to reduce number of GO terms - NOT using this to look at individual modules for exploratory purposes
keywords<-c("metabolism", "carbon","bicarbonate", "apoptosis", "death", "symbiosis", "regulation of cell communication", "trans membrane transport", "transmembrane", "organic substance transport", "inorganic substance transport","response to stress", "antioxidant", "calcification","biomineralization", "heat","transporters","proton transport","ion transport","acid-base", "homeostasis")
Search for these keywords in the parent terms
go_results_BP_up_reduced_keywords <- go_results_BP_up_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
length(unique(go_results_BP_up_reduced_keywords$GOterm))
## [1] 82
length(unique(go_results_BP_up_reduced_keywords$ParentTerm))
## [1] 7
go_results_BP_down_reduced_keywords <- go_results_BP_down_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
length(unique(go_results_BP_down_reduced_keywords$GOterm))
## [1] 15
length(unique(go_results_BP_down_reduced_keywords$ParentTerm))
## [1] 5
go_results_BP_biomin_reduced_keywords <- go_results_BP_biomin_reduced %>%
filter(grepl(paste(keywords, collapse="|"), ParentTerm))
length(unique(go_results_BP_biomin_reduced_keywords$GOterm))
## [1] 24
length(unique(go_results_BP_biomin_reduced_keywords$ParentTerm))
## [1] 3
#write.csv(go_results_BP_up_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#write.csv(go_results_BP_biomin_reduced_keywords, "../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
Won’t move forward with these for now, but they are there if we need further reduced lists.
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>%
mutate(Factor = "Up")
cal_up_terms_parent_nterm <- cal_up_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Up") %>% ungroup()
head(cal_up_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794 7.575558e-23 1 823
## 2 GO:0065007 9.035173e-22 1 912
## 3 GO:0050789 1.200453e-20 1 859
## 4 GO:0048522 2.736399e-18 1 531
## 5 GO:0048518 2.733516e-17 1 574
## 6 GO:0009987 1.373221e-16 1 1074
## numInCat term ontology Module
## 1 2785 regulation of cellular process BP Blue
## 2 3184 biological regulation BP Blue
## 3 2981 regulation of biological process BP Blue
## 4 1705 positive regulation of cellular process BP Blue
## 5 1888 positive regulation of biological process BP Blue
## 6 4026 cellular process BP Blue
## ParentTerm Factor
## 1 regulation of cellular process Up
## 2 regulation of cellular process Up
## 3 regulation of cellular process Up
## 4 positive regulation of transcription by RNA polymerase II Up
## 5 positive regulation of transcription by RNA polymerase II Up
## 6 cellular process Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
cal_down_terms_parent_nterm <- cal_down_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term)) %>%
mutate(Calcification.direction = "Down") %>% ungroup()
head(cal_down_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181 1.561228e-16 1 67
## 2 GO:0006412 2.328139e-16 1 123
## 3 GO:0043043 3.132084e-15 1 125
## 4 GO:0006518 5.575430e-15 1 145
## 5 GO:0006614 1.285770e-14 1 48
## 6 GO:0043603 2.192130e-14 1 170
## numInCat term ontology
## 1 90 cytoplasmic translation BP
## 2 219 translation BP
## 3 231 peptide biosynthetic process BP
## 4 285 peptide metabolic process BP
## 5 59 SRP-dependent cotranslational protein targeting to membrane BP
## 6 359 amide metabolic process BP
## Module ParentTerm Factor
## 1 Turquoise translation Down
## 2 Turquoise translation Down
## 3 Turquoise translation Down
## 4 Turquoise translation Down
## 5 Turquoise protein transport Down
## 6 Turquoise translation Down
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the list of biomineralization-related genes. The list has been further reduced by using the package rrvgo.
#cal_biomin_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_biomin_filtered.csv")
cal_biomin_terms <- go_results_BP_biomin_reduced
cal_biomin_terms <- cal_biomin_terms %>% mutate(Factor = "Biomin") %>% mutate(Module = "Biomin")
head(cal_biomin_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0015701 1.109811e-08 1.0000000 5
## 2 GO:0051453 8.010476e-04 0.9999684 3
## 3 GO:0030641 8.796110e-04 0.9999641 3
## 4 GO:0006885 1.025213e-03 0.9999556 3
## 5 GO:1902476 1.143461e-03 0.9999494 3
## 6 GO:0006821 1.203044e-03 0.9999456 3
## numInCat term ontology
## 1 11 bicarbonate transport BP
## 2 29 regulation of intracellular pH BP
## 3 30 regulation of cellular pH BP
## 4 32 regulation of pH BP
## 5 26 chloride transmembrane transport BP
## 6 27 chloride transport BP
## ParentTerm Factor Module
## 1 regulation of dopamine secretion Biomin Biomin
## 2 intracellular calcium ion homeostasis Biomin Biomin
## 3 intracellular calcium ion homeostasis Biomin Biomin
## 4 intracellular calcium ion homeostasis Biomin Biomin
## 5 monoatomic ion transport Biomin Biomin
## 6 monoatomic ion transport Biomin Biomin
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification or in the biomineralization gene list. The list has been further reduced by using the package rrvgo.
all_terms <- rbind(cal_down_terms,cal_up_terms,cal_biomin_terms)
all_terms <- all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm","Module")]
all_terms$GOterm<-as.factor(all_terms$GOterm)
dim(all_terms) #785 reduced terms
## [1] 2287 10
length(unique(all_terms$GOterm)) #this represents 780 unique terms between the three lists of reduced terms
## [1] 2036
length(unique(all_terms$ParentTerm)) #this represents 92 unique terms between the three lists of reduced terms
## [1] 152
This is collapsing the list from 2287 rows to 2036, representing the 2036 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.
goterms_shared <- all_terms %>%
group_by(GOterm) %>%
dplyr::summarise(
ParentTerm = paste(unique(ParentTerm), collapse = ", "),
Factor = paste(unique(Factor), collapse = ", "),
term = paste(unique(term), collapse = ", ")
)
dim(goterms_shared)
## [1] 2036 4
goterms_shared_full <- goterms_shared %>%
left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 1817 rows back to the 3453 in all_terms
mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
mutate(pvalue_Biomin = ifelse(Factor.y == "Biomin", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Biomin factor
dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
pvalue_Down = na.omit(pvalue_Down)[1], #carry over the p-value for the term in the down direction, by taking the first non-NA value.
pvalue_Biomin = na.omit(pvalue_Biomin)[1]) %>% #carry over the p-value for the term in the biomin list, by taking the first non-NA value.
rename(Factor.x = "Factor") #rename column
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")
goterms_shared_full <- read.csv("../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm_with_Biomin.csv")
goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()
goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Biomin")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
goterms_shared_only_biomin <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up, Biomin" | Factor=="Up, Biomin" | Factor=="Down, Biomin")
nrow(goterms_up)
## [1] 1555
nrow(goterms_down)
## [1] 294
nrow(goterms_biomin)
## [1] 32
nrow(goterms_shared_only)
## [1] 136
nrow(goterms_shared_only_biomin)
## [1] 19
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_biomin) + nrow(goterms_shared_only) + nrow(goterms_shared_only_biomin) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 144
length(unique(goterms_down$ParentTerm))
## [1] 78
length(unique(goterms_biomin$ParentTerm))
## [1] 16
length(unique(goterms_shared_only$ParentTerm))
## [1] 42
length(unique(goterms_shared_only_biomin$ParentTerm))
## [1] 7
For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)
For UNIQUE parent terms (ones that are only in Up or only in Down, whether or not they are in Biomin)
parent_filtered_up <- result_parent_unique %>%
dplyr::filter(Factor == "Up" | Factor=="Up, Biomin")
#dplyr::filter(SharedGOterms>=5)
# parent_filtered_up <- result_parent_unique %>%
# dplyr::filter(Factor=="Up, Biomin")
# #dplyr::filter(SharedGOterms>=5)
parent_filtered_down <- result_parent_unique %>%
dplyr::filter(Factor=="Down" | Factor=="Down, Biomin")
#dplyr::filter(SharedGOterms>=5)
# parent_filtered_down <- result_parent_unique %>%
# dplyr::filter(Factor=="Down, Biomin")
# #dplyr::filter(SharedGOterms>=5)
merged_up <- parent_filtered_up %>%
dplyr::group_by(ParentTerm) %>%
left_join(cal_up_terms_parent_nterm, by = "ParentTerm") %>%
rename(SharedGOterms = "UniqueGOTerms") %>%
mutate(
Has_Biomin = any(Factor == "Up, Biomin"),
Sum_of_UniqueGOterms = sum(UniqueGOTerms, na.rm = TRUE),
Proportion.of.shared.GO.terms.with.biomineralization.genes = ifelse(
Factor %in% c("Up, Biomin"),
UniqueGOTerms / Sum_of_UniqueGOterms,
0
)
) %>%
mutate(Percentage.of.shared.GO.terms.with.biomineralization.genes = Proportion.of.shared.GO.terms.with.biomineralization.genes * 100) %>%
filter((Has_Biomin == TRUE & Factor == "Up, Biomin") | (Has_Biomin == FALSE & Factor == "Up"))
merged_up_clean <- na.omit(merged_up) #remove rows for the non-"Up, Biomin" rows that don't have a percentage
merged_up_clean
## # A tibble: 144 × 9
## # Groups: ParentTerm [144]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 cell cycle Up 57 57 Up
## 2 regulation of ca… Up 46 47 Up
## 3 regulation of pr… Up 44 47 Up
## 4 IRE1-mediated un… Up 41 41 Up
## 5 lipid metabolic … Up 41 41 Up
## 6 protein transport Up 38 41 Up
## 7 ubiquitin-depend… Up 33 39 Up
## 8 intracellular si… Up 31 33 Up
## 9 innate immune re… Up 27 28 Up
## 10 mitochondrion or… Up 25 25 Up
## # ℹ 134 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
cal_freq_terms_filtered_up <- merged_up_clean %>%
#filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 144 × 9
## # Groups: ParentTerm [144]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 cell cycle Up 57 57 Up
## 2 regulation of ca… Up 46 47 Up
## 3 regulation of pr… Up 44 47 Up
## 4 IRE1-mediated un… Up 41 41 Up
## 5 lipid metabolic … Up 41 41 Up
## 6 protein transport Up 38 41 Up
## 7 ubiquitin-depend… Up 33 39 Up
## 8 intracellular si… Up 31 33 Up
## 9 innate immune re… Up 27 28 Up
## 10 mitochondrion or… Up 25 25 Up
## # ℹ 134 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
#scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12)) #making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique.pdf", plot=freq_fig_up, dpi=300, height=15, width=10, units="in", limitsize=FALSE)
cal_freq_terms_filtered_down <- merged_down_clean %>%
#filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 79 × 9
## # Groups: ParentTerm [79]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 ubiquitin-depend… Down 20 26 Down
## 2 peptidyl-serine … Down 14 16 Down
## 3 protein transport Down 13 16 Down
## 4 regulation of tr… Down 12 17 Down
## 5 reproduction Down 11 13 Down
## 6 fatty acid metab… Down 10 12 Down
## 7 intracellular si… Down 10 12 Down
## 8 negative regulat… Down 10 12 Down
## 9 regulation of pr… Down 10 13 Down
## 10 ribosome biogene… Down 9 9 Down
## # ℹ 69 more rows
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
#scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique.pdf", plot=freq_fig_down, dpi=300, height=15, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs
cal_freq_terms_filtered_up <- merged_up_clean %>%
filter(Percentage.of.shared.GO.terms.with.biomineralization.genes>0) %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 7 × 9
## # Groups: ParentTerm [7]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 intracellular cal… Up, B… 5 24 Up
## 2 monoatomic ion tr… Up, B… 5 13 Up
## 3 proton transmembr… Up, B… 2 8 Up
## 4 ammonium ion meta… Up, B… 1 2 Up
## 5 intracellular tra… Up, B… 1 7 Up
## 6 regulation of dop… Up, B… 1 11 Up
## 7 regulation of pro… Up, B… 1 26 Up
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
#scale_y_continuous(expression(GO~term~counts),limits=c(0,15))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12)) #making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_colored_unique_biomin.pdf", plot=freq_fig_up, dpi=300, height=5, width=10, units="in", limitsize=FALSE)
cal_freq_terms_filtered_down <- merged_down_clean %>%
filter(Percentage.of.shared.GO.terms.with.biomineralization.genes>0) %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 9
## # Groups: ParentTerm [2]
## ParentTerm Factor UniqueGOTerms Number.of.terms Calcification.direct…¹
## <chr> <chr> <int> <int> <chr>
## 1 monoatomic ion tr… Down,… 2 4 Down
## 2 regulation of pro… Down,… 1 8 Down
## # ℹ abbreviated name: ¹Calcification.direction
## # ℹ 4 more variables: Has_Biomin <lgl>, Sum_of_UniqueGOterms <int>,
## # Proportion.of.shared.GO.terms.with.biomineralization.genes <dbl>,
## # Percentage.of.shared.GO.terms.with.biomineralization.genes <dbl>
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Sum_of_UniqueGOterms,x=reorder(ParentTerm, Sum_of_UniqueGOterms), fill=Percentage.of.shared.GO.terms.with.biomineralization.genes,group=1))+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Sum_of_UniqueGOterms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
#scale_y_continuous(expression(GO~term~counts),limits=c(0,20))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_colored_unique_biomin.pdf", plot=freq_fig_down, dpi=300, height=5, width=10, units="in", limitsize=FALSE)
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs